data

library(rpart)
library(mclust)
## Package 'mclust' version 6.0.0
## Type 'citation("mclust")' for citing this R package in publications.
library(parallel)
library(e1071)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
zip = read.csv("/course/data/2024-yong/lab07/zip.csv")
dim(zip)
## [1] 9298  257
data <- zip[zip$digit == 6 | zip$digit == 8, ]

str(data)
## 'data.frame':    1542 obs. of  257 variables:
##  $ digit: int  6 8 8 6 6 6 6 6 8 6 ...
##  $ p1   : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p2   : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p3   : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p4   : num  -1 -1 -1 -1 -1 -1 -1 -1 -0.599 -1 ...
##  $ p5   : num  -1 -1 -0.837 -1 -1 -1 -1 -1 0.83 -1 ...
##  $ p6   : num  -1 -1 -0.303 -1 -1 -1 -1 -1 1 -1 ...
##  $ p7   : num  -1 -1 0.48 -1 -1 -1 -1 -1 1 -1 ...
##  $ p8   : num  -1 -0.936 0.764 -0.458 -0.048 -0.996 -1 -1 0.904 -0.835 ...
##  $ p9   : num  -0.72 -0.123 0.611 0.903 0.989 0.275 -0.192 -0.832 -0.542 0.858 ...
##  $ p10  : num  0.525 0.861 0.394 0.964 -0.282 -0.165 0.151 0.782 -1 -0.543 ...
##  $ p11  : num  -0.392 -0.066 -0.361 -0.407 -1 -1 -1 0.189 -1 -1 ...
##  $ p12  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p13  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p14  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p15  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p16  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p17  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p18  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p19  : num  -1 -1 -1 -1 -1 -1 -1 -1 -0.533 -1 ...
##  $ p20  : num  -1 -1 -0.553 -1 -1 -1 -1 -1 0.852 -1 ...
##  $ p21  : num  -1 -0.941 0.998 -1 -1 -1 -1 -1 1 -1 ...
##  $ p22  : num  -1 0.374 1 -1 -1 -1 -1 -1 1 -1 ...
##  $ p23  : num  -1 0.647 1 -0.104 -0.645 -1 -1 -1 1 -1 ...
##  $ p24  : num  -0.961 0.823 1 0.999 0.997 -0.444 -0.896 -0.868 1 -0.279 ...
##  $ p25  : num  0.716 0.941 1 1 0.996 0.835 0.84 0.518 0.921 1 ...
##  $ p26  : num  0.298 0.506 1 0.994 -0.511 -0.667 -0.253 1 0.818 -0.527 ...
##  $ p27  : num  -0.979 1 1 -0.296 -1 -1 -1 0.581 0.818 -1 ...
##  $ p28  : num  -1 -0.637 0.055 -1 -1 -1 -1 -1 0.152 -1 ...
##  $ p29  : num  -1 -1 -1 -1 -1 -1 -1 -1 0.091 -1 ...
##  $ p30  : num  -1 -1 -1 -1 -1 -1 -1 -1 -0.608 -1 ...
##  $ p31  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p32  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p33  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p34  : num  -1 -1 -1 -1 -1 -1 -1 -1 -0.98 -1 ...
##  $ p35  : num  -1 -1 -0.8 -1 -1 -1 -1 -1 0.461 -1 ...
##  $ p36  : num  -1 -1 0.834 -1 -1 -1 -1 -1 1 -1 ...
##  $ p37  : num  -1 -0.303 1 -0.961 -1 -1 -1 -1 0.836 -1 ...
##  $ p38  : num  -1 1 1 0.37 -1 -1 -1 -1 -0.485 -1 ...
##  $ p39  : num  -1 0.498 0.3 0.979 0.216 -1 -1 -1 0.202 -0.795 ...
##  $ p40  : num  0.045 0.414 -0.338 0.936 1 0.579 -0.338 0.115 1 0.757 ...
##  $ p41  : num  0.569 0.919 -0.744 0.05 0.352 0.187 0.912 1 1 0.774 ...
##  $ p42  : num  -0.903 -0.018 -0.418 -0.79 -0.996 -1 -0.883 0.767 1 -0.981 ...
##  $ p43  : num  -1 1 0.305 -1 -1 -1 -1 -0.597 1 -1 ...
##  $ p44  : num  -1 -0.383 -0.118 -1 -1 -1 -1 -1 1 -1 ...
##  $ p45  : num  -1 -1 0.267 -1 -1 -1 -1 -1 1 -1 ...
##  $ p46  : num  -1 -1 -0.164 -1 -1 -1 -1 -1 0.806 -1 ...
##  $ p47  : num  -1 -1 -1 -1 -1 -1 -1 -1 -0.751 -1 ...
##  $ p48  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p49  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p50  : num  -1 -1 -1 -1 -1 -1 -1 -1 -0.754 -1 ...
##  $ p51  : num  -1 -1 0.375 -1 -1 -1 -1 -1 1 -1 ...
##  $ p52  : num  -1 -1 1 -1 -1 -1 -1 -1 1 -1 ...
##  $ p53  : num  -1 -0.558 0.817 0.311 -1 -1 -1 -1 0.043 -1 ...
##  $ p54  : num  -1 1 -0.454 1 -0.399 -1 -1 -1 -1 -1 ...
##  $ p55  : num  -0.568 -0.148 -0.987 0.935 0.97 -0.264 -1 -0.664 -0.773 0.114 ...
##  $ p56  : num  0.961 -1 -1 -0.408 0.94 0.986 0.589 0.976 0.508 0.998 ...
##  $ p57  : num  -0.55 -0.601 -1 -1 -0.714 -0.725 0.402 0.695 1 -0.276 ...
##  $ p58  : num  -1 0.628 -1 -1 -1 -1 -1 -0.808 0.544 -1 ...
##  $ p59  : num  -1 1 -0.817 -1 -1 -1 -1 -1 -0.454 -1 ...
##  $ p60  : num  -1 -0.349 0.442 -1 -1 -1 -1 -1 -0.454 -1 ...
##  $ p61  : num  -1 -1 1 -1 -1 -1 -1 -1 0.663 -1 ...
##  $ p62  : num  -1 -1 0.5 -1 -1 -1 -1 -1 1 -1 ...
##  $ p63  : num  -1 -1 -1 -1 -1 -1 -1 -1 0.009 -1 ...
##  $ p64  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p65  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p66  : num  -1 -1 -1 -1 -1 -1 -1 -1 -0.642 -1 ...
##  $ p67  : num  -1 -1 0.215 -1 -1 -1 -1 -1 1 -1 ...
##  $ p68  : num  -1 -1 1 -0.236 -1 -1 -1 -1 1 -1 ...
##  $ p69  : num  -1 -1 0.035 0.965 -0.855 -1 -1 -1 -0.068 -1 ...
##  $ p70  : num  -0.998 0.551 -1 1 0.839 -0.814 -1 -0.998 -1 -0.852 ...
##  $ p71  : num  0.489 0.703 -1 0.109 1 0.798 -0.636 0.581 -1 0.86 ...
##  $ p72  : num  0.054 -0.987 -1 -1 0.096 0.213 0.991 0.99 -1 0.152 ...
##  $ p73  : num  -1 -1 -1 -1 -1 -1 -0.45 -0.402 -0.552 -1 ...
##  $ p74  : num  -1 0.274 -0.709 -1 -1 -1 -1 -1 -0.918 -1 ...
##  $ p75  : num  -1 1 0.799 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p76  : num  -1 -0.62 1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p77  : num  -1 -1 0.834 -1 -1 -1 -1 -1 -0.46 -1 ...
##  $ p78  : num  -1 -1 -0.674 -1 -1 -1 -1 -1 1 -1 ...
##  $ p79  : num  -1 -1 -1 -1 -1 -1 -1 -1 0.824 -1 ...
##  $ p80  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p81  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p82  : num  -1 -1 -1 -1 -1 -1 -1 -1 -0.965 -1 ...
##  $ p83  : num  -1 -1 -0.438 -0.747 -1 -1 -1 -1 0.633 -1 ...
##  $ p84  : num  -1 -1 0.993 0.828 -1 -1 -1 -1 1 -1 ...
##  $ p85  : num  -1 -1 0.718 1 0.113 -1 -1 -1 0.949 -0.967 ...
##  $ p86  : num  -0.418 -0.735 -0.523 0.259 1 0.05 -1 -0.209 -0.307 0.365 ...
##  $ p87  : num  0.942 0.98 -1 -0.984 0.829 0.894 0.139 0.998 -0.963 0.635 ...
##  $ p88  : num  -0.781 -0.421 -1 -1 -0.863 -0.747 0.761 0.364 -1 -0.935 ...
##  $ p89  : num  -1 -0.532 -0.852 -1 -1 -1 -0.988 -1 -1 -1 ...
##  $ p90  : num  -1 0.88 0.782 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p91  : num  -1 0.82 1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p92  : num  -1 -0.96 0.852 -1 -1 -1 -1 -1 -0.707 -1 ...
##  $ p93  : num  -1 -1 -0.657 -1 -1 -1 -1 -1 0.408 -1 ...
##  $ p94  : num  -1 -1 -1 -1 -1 -1 -1 -1 1 -1 ...
##  $ p95  : num  -1 -1 -1 -1 -1 -1 -1 -1 0.72 -1 ...
##  $ p96  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p97  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p98  : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##   [list output truncated]
set.seed(769)

Variable Selection

1.Ensemble methods are good tools for finding important predictor variables. In aid of Random Forest, find the two most important predictors out of 256 ones (in terms of Accuracy), for classifying between observations with digit = 6 and digit = 8.

set.seed(769)
# Convert the response variable to a factor
data$digit <- as.factor(data$digit)
rf <- randomForest(digit ~ ., data, importance = TRUE)
rf
## 
## Call:
##  randomForest(formula = digit ~ ., data = data, importance = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 16
## 
##         OOB estimate of  error rate: 1.04%
## Confusion matrix:
##     6   8 class.error
## 6 822  12 0.014388489
## 8   4 704 0.005649718
plot(rf)

set.seed(769)
import <- importance(rf, type=1)

import_sorted <- import[order(-import[, "MeanDecreaseAccuracy"]), , drop = FALSE]
(top_2 <- head(import_sorted, 2))
##     MeanDecreaseAccuracy
## p75             12.55577
## p91             11.58263
ops_rf <- randomForest(digit ~ p75 + p91, data)
ops_rf
## 
## Call:
##  randomForest(formula = digit ~ p75 + p91, data = data) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 1
## 
##         OOB estimate of  error rate: 7.2%
## Confusion matrix:
##     6   8 class.error
## 6 774  60  0.07194245
## 8  51 657  0.07203390

What is the OOB error using all predictors? What is the OOB error if only the two selected predictors are used with Random Forest?

The OOB error is 0.97%, if use two use two selected predictors the OOB is increase to 6.36%.

To create another subset with two predictors, consider the top ten most important predictors given by the Random Forest model for 256 predictors from Question 1. The first most important predictor will still be used, but the second one is the least correlated with the top one, out of the remaining 9 predictors.

set.seed(769)
# 10 most imprtant 
top_10 <- rownames(head(import_sorted, 10))

# most important 
top_predictor <- top_10[1]
# the rest
remaining_pred <- top_10[-1]

corr <- sapply(remaining_pred, function(pred){
  cor(data[[top_predictor]], data[[pred]])
})


# selecting least correlated predictior
(least <- names(which.min(abs(corr))))
## [1] "p37"
# create substring with two selected pred
subdata <- data[,c("digit",top_predictor, least)]
str(subdata)
## 'data.frame':    1542 obs. of  3 variables:
##  $ digit: Factor w/ 2 levels "6","8": 1 2 2 1 1 1 1 1 2 1 ...
##  $ p75  : num  -1 1 0.799 -1 -1 -1 -1 -1 -1 -1 ...
##  $ p37  : num  -1 -0.303 1 -0.961 -1 -1 -1 -1 0.836 -1 ...

What is the OOB error if these two selected predictors are used with Random Forest? Do you think these two predictors form a better subset of size 2 than the one obtained in Question 1?

set.seed(769)
rf2 <- randomForest(digit ~. , subdata, importance = T)
rf2
## 
## Call:
##  randomForest(formula = digit ~ ., data = subdata, importance = T) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 1
## 
##         OOB estimate of  error rate: 6.1%
## Confusion matrix:
##     6   8 class.error
## 6 805  29  0.03477218
## 8  65 643  0.09180791

The OOB error this time is 6.36% which superisely is the same as last time.

The subset of data with two digit classes and the two predictor variables found in Question 2 will be used in Questions 3-5 for clustering and in Questions 6-8 for support vector machines.

Clustering

3.Without using the digit variable, run the K-means algorithm to partition the images into K = 2, 3, …, 7 clusters, respectively.

set.seed(769)
ari = double(5)
par(mfrow=c(3,2))
for(i in seq(2,7,1)){
  cluster <- kmeans(subdata, centers = i)
  with(subdata, plot(p75, p37, col = cluster$cluster + 2, main = paste0("k=", i)))
  points(cluster$centers, bg=3:4, pch =21, cex=1.8, lwd =2)
  ari[i-1] = adjustedRandIndex(cluster$cluster, subdata$digit)
}

ari
## [1] 0.9974059 0.8089484 0.7603472 0.6989755 0.6748412 0.6573466

Compute the adjusted Rand indices for these clustering results, by comparing them with the true class labels. Does this unsupervised learning method do a good job for the supervised data set here, in particular when K=2?

In the particular when k =2 the ari is have a reletive high score 0.995 which inidcate the unsupervised learning method did a good job, when K increase it might cause overfitted.

4.Redo Question 3, using instead each of the four linkage methods: “complete”, “single”, “average” and “centroid”

Complete

# Calculate the distance matrix
d <- dist(subdata)
arihc <- double(5)
method <- c("complete", "single", "average", "centroid")
par(mfrow=c(2,3))
for (m in method){
   # Perform hierarchical clustering with the specified method
  hc <- hclust(d, method = m)
  
  for (k in 2:7){
    # cluster labels of oberservations 2 to 7 
    trees <- cutree(hc, k)
    arihc[k-1] <- adjustedRandIndex(subdata$digit, trees)
    # cat("Mehtod: ", m, " - K =", k, " ARI = ", round(arihc, 4), "\n")
    with(subdata, plot(p75, p37, col = cluster$cluster + 2, main = paste0("Mehtod = ", m, " k=", k)))
  points(cluster$centers, bg=3:4, pch =21, cex=1.8, lwd =2)
  }
  cat("Method: ", m, "ARI = ", arihc, "\n")
}

## Method:  complete ARI =  1 0.820832 0.7816271 0.703652 0.6996163 0.6819863

## Method:  single ARI =  1 0.9985977 0.9958 0.9944029 0.9930074 0.9916137

## Method:  average ARI =  1 0.8286265 0.7300534 0.6908914 0.6739876 0.6739407

## Method:  centroid ARI =  1 0.9958034 0.9618914 0.961851 0.7855862 0.6887279

5.Produce a scatter plot for each of the following 6 partitioning results with 2 classes/clusters:

# Create a new dataset with jittered values (sd = 0.1) to avoid overplotting
jittered_data <- as.data.frame(lapply(subdata, function(x) {
  if (is.numeric(x)) jitter(x, factor = 0.1) else x
}))
class labels
plot(jittered_data$p75, jittered_data$p37, col = as.factor(data$digit), pch = as.numeric(as.factor(data$digit)), 
     main = "True Class Labels", xlab = "P75", ylab = "P37")

#### K-means

kmeans_result <- kmeans(subdata, centers = 2)
plot(jittered_data$p75, jittered_data$P37, col = kmeans_result$cluster, pch = kmeans_result$cluster + 1, 
     main = "K-means Clustering", xlab = "P75", ylab = "P37")

complete linkage
hc_complete <- hclust(d, method = "complete")
clusters_complete <- cutree(hc_complete, k = 2)
plot(jittered_data$p75, jittered_data$p37, col = clusters_complete, pch = clusters_complete + 1, 
     main = "Complete Linkage", xlab = "P75", ylab = "P37")

##### single linkage

hc_single <- hclust(d, method = "single")
clusters_single <- cutree(hc_single, k = 2)
plot(jittered_data$p75, jittered_data$p37, col = clusters_single, pch = clusters_single + 1, 
     main = "Single Linkage", xlab = "P75", ylab = "P37")

Average Linkage
hc_average <- hclust(d, method = "average")
clusters_average <- cutree(hc_average, k = 2)
plot(jittered_data$p75, jittered_data$p37, col = clusters_average, pch = clusters_average + 1, 
     main = "Average Linkage", xlab = "P75", ylab = "P37")

Centroid Linkage
hc_centroid <- hclust(d, method = "centroid")
clusters_centroid <- cutree(hc_centroid, k = 2)
plot(jittered_data$p75, jittered_data$p37, col = clusters_centroid, pch = clusters_centroid + 1, 
     main = "Centroid Linkage", xlab = "P75", ylab = "P37" )

Do you think the ARI values make sense?

Base on the result plot, that ARI is make sense, because when K =2 all kind of cluster are having similar performance therefore provide the ARI correctness.

Support Vector Machines

split and train

train_data <- subdata[1:500,]
test_data <- subdata[501 :nrow(subdata), ]

Train support vector machines using the linear kernel, for cost = 0.001, 0.01, 0.1, 1, 10, 100, respectively.

Produce a classification plot for each value of cost, which also shows the decision boundary. You can either use the plot function provided in the e1071 package or write your own code (perhaps similar to mine). Also, add some jittering to the data (with sd=0.1, say), and also show the support vectors, at the same locations as the jittered observations that they are associated with.

for (i in c(0.001, 0.01, 0.1, 1, 10, 100)){
  r = svm(digit ~ ., data= train_data, scale = F, kernel = "linear", cost = i)
  plot(r, train_data)
  mtext(paste0("cost", i))
  yhat = predict(r,newdata = test_data)
  # Create a confusion matrix to compare the actual 'class' labels from the test data with the predicted class labels from the Naive Bayes mode
  print(table(test_data$digit, yhat))
  
  # Calculate the accuracy of the model by checking the proportion of correct predictions
  A = mean(test_data$digit == yhat)
  cat(paste0("cost = ",i, " Accuracy rate ", A, '\n'))
}

##    yhat
##       6   8
##   6 562   0
##   8 390  90
## cost = 0.001 Accuracy rate 0.625719769673704

##    yhat
##       6   8
##   6 544  18
##   8  64 416
## cost = 0.01 Accuracy rate 0.921305182341651

##    yhat
##       6   8
##   6 532  30
##   8  45 435
## cost = 0.1 Accuracy rate 0.928023032629559

##    yhat
##       6   8
##   6 526  36
##   8  37 443
## cost = 1 Accuracy rate 0.929942418426104

##    yhat
##       6   8
##   6 526  36
##   8  36 444
## cost = 10 Accuracy rate 0.930902111324376

##    yhat
##       6   8
##   6 526  36
##   8  36 444
## cost = 100 Accuracy rate 0.930902111324376

What is the effect of cost here, on the decision boundary and the number of support vectors?

From the plot above, that the cost indicate the accuracy rate, higher accuracy rate come with higher cost value.

7.Compute the training and test errors (misclassification rates) for each of the support vector machines found in Question 6.

From output Q6 that the accuracy rate have a major gap between cost 0.001 with the rest, which indicates that with cost 0.01 SVM are able to classify over 90% of class.

8.Consider using radial kernels for support vector machines. With cost=1 held fixed, train support vector machines, for gamma = 0.001, 0.01, 0.1, 1, 10, 100, respectively.

for (i in c(0.001, 0.01, 0.1, 1, 10, 100)){
  r = svm(digit ~ ., data= train_data, scale = F, kernel = "radial", cost = 1, gamma = i)
  plot(r, train_data )
  mtext(paste0("gamma", i))
  yhat = predict(r,newdata = test_data)
  # Create a confusion matrix to compare the actual 'class' labels from the test data with the predicted class labels from the Naive Bayes mode
  print(table(test_data$digit, yhat))
  # Calculate the accuracy of the model by checking the proportion of correct predictions
  A = mean(test_data$digit == yhat)
  cat(paste0("gamma = ",i, " Accuracy rate ", A, '\n'))
}

##    yhat
##       6   8
##   6 559   3
##   8 158 322
## gamma = 0.001 Accuracy rate 0.845489443378119

##    yhat
##       6   8
##   6 537  25
##   8  53 427
## gamma = 0.01 Accuracy rate 0.925143953934741

##    yhat
##       6   8
##   6 529  33
##   8  38 442
## gamma = 0.1 Accuracy rate 0.931861804222649

##    yhat
##       6   8
##   6 529  33
##   8  36 444
## gamma = 1 Accuracy rate 0.933781190019194

##    yhat
##       6   8
##   6 529  33
##   8  35 445
## gamma = 10 Accuracy rate 0.934740882917466

##    yhat
##       6   8
##   6 525  37
##   8  32 448
## gamma = 100 Accuracy rate 0.933781190019194

What is the effect of gamma here, on the decision boundary and the number of support vectors?

The gamma effect helps improve the performance compare to the change the cost.

Do you think using the radial kernel helps here, as compared with the linear kernel?

Yes, the radial kernel helps, from the result that initial accuracy rate is improve a lot, but with the gamma increase the performance till gamma = 10 reach to the peak, then drop down.

10.Now consider using all 256 predictors. Find the best values for cost and gamma based on 10-fold cross-validation (just one run) from the train set.

train_256 <- zip[1:500,]
test_256 <- zip[501 :nrow(zip), ]
values <- c(0.001, 0.01, 0.1, 1, 10, 100)
rt = tune(svm, digit ~ ., data=train_256, kernel="radial", scale=FALSE, ranges = list(cost = values, gamma = values),validation.x=10 )
summary(rt)  

model <-rt$best.model
# Make predictions on the train set
yhat_train <- predict(model, newdata = train_256)
# Calculate and print accuracy on the training set
train_accuracy <- 1- mean(train_256$digit == yhat_train)
cat("Training Set Accuracy: ", sprintf("%.10f", train_accuracy), "\n")

# Make predictions on the test set
yhat_test <- predict(rt$best.model, newdata = test_256)

# Calculate and print accuracy on the test set
test_accuracy <- 1- mean(yhat_test == test_256$digit)
cat("Test Set Accuracy: ", sprintf("%.10f", test_accuracy), "\n")
Parameter tuning of ‘svm’:

- sampling method: 10-fold cross validation 

- best parameters: 
    cost gamma
    10   0.01

- best performance: 1.615847 

- Detailed performance results:
          cost  gamma error  dispersion
          
          1e-03 1e-03   8.720153    0.8214717   
          1e-02 1e-03   8.576763    0.8004872   
          1e-01 1e-03   7.482756    0.6376604   
          1e+00 1e-03   3.950381    0.7507093   
          1e+01 1e-03   2.594183    0.8554863   
          1e+02 1e-03   2.120036    0.6751115   
          1e-03 1e-02   8.685074    0.8179124   
          1e-02 1e-02   8.242179    0.7692930   
          1e-01 1e-02   5.556555    0.6460357   
          1e+00 1e-02   2.088004    0.7254578   
          1e+01 1e-02   1.615847    0.6433051   
          1e+02 1e-02   1.621082    0.6415757   
          1e-03 1e-01   8.728804    0.8223642   
          1e-02 1e-01   8.662643    0.8097531   
          1e-01 1e-01   8.143349    0.7247699   
          1e+00 1e-01   7.380374    0.8001499   
          1e+01 1e-01   7.009282    0.8914761   
          1e+02 1e-01   7.009282    0.8914761   
          1e-03 1e+00   8.736279    0.8236107   
          1e-02 1e+00   8.735986    0.8217589   
          1e-01 1e+00   8.734875    0.8037311   
          1e+00 1e+00   8.744292    0.7314770   
          1e+01 1e+00   8.576852    0.7594888   
          1e+02 1e+00   8.576852    0.7594888   
          1e-03 1e+01   8.736402    0.8236676   
          1e-02 1e+01   8.737213    0.8223241   
          1e-01 1e+01   8.746837    0.8090230   
          1e+00 1e+01   8.843988    0.7622132   
          1e+01 1e+01   8.765956    0.8204965   
          1e+02 1e+01   8.765956    0.8204965   
          1e-03 1e+02   8.736402    0.8236676   
          1e-02 1e+02   8.737213    0.8223241   
          1e-01 1e+02   8.746837    0.8090231   
          1e+00 1e+02   8.843989    0.7622142   
          1e+01 1e+02   8.765958    0.8204991   
          1e+02 1e+02   8.765958    0.8204991   
          
Training Set Accuracy:  1.0000000000 
Test Set Accuracy:  1.0000000000           

Random Forest and Variable Selection

Clustering

Support Vector Machines (SVM)

Cross-Validation with All Predictors